fig, ax = plt.subplots(figsize=(12,8))
index = np.arange(len(bins))
bar_width = 0.4
opacity = 0.8
bar1 = plt.bar(index, boys_bins, bar_width, alpha=opacity,
label='Boys (n={0:,.0f}; $\mu$={1:,.2f})'.format(np.sum(boys_bins), np.mean(boys)))
bar2 = plt.bar(index + bar_width, girls_bins, bar_width, alpha=opacity,
label='Girls (n={0:,.0f}; $\mu$={1:,.2f})'.format(np.sum(girls_bins), np.mean(girls)))
# plt.xlabel('Obesity Percentiles', fontsize=14)
plt.ylabel('Number of Children', fontsize=14)
plt.title('BMI Percentile Distribution at 4.5 to 5.5 years - No Exclusions', fontsize=20)
# plt.xticks(index + bar_width, ['{0:.1f}%'.format(b * 100) for b in bins])
plt.xticks(index, bin_names, rotation=30)
ax.yaxis.grid(linestyle='--')
ax.set_axisbelow(True)
plt.legend(fontsize=12)
plt.tight_layout()
# plt.savefig('../outputs_age_analyses20180221/no_exclusions_bmi_percentiles.png', dpi=96)
plt.show()
train.plot_growth_curve(d1,None,keys[0], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[1], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[2], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[3], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[4], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[5], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[6], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[7], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[8], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[9], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[10], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[11], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[12], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[13], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[14], hide_mrn=True)
train.plot_growth_curve(d1,None,keys[15], hide_mrn=True)
modelix = 'BMI'
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
if modelix not in titles_total[ix] or 'randomforest' in titles_total[ix] or 'gradientboost' in titles_total[ix]:
continue
if 'girls' in titles_total[ix]:
plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=12)
plt.ylabel('Sensitivity', fontsize=12)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years - "Vital: BMI-latest"', fontsize=14)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
if modelix not in titles_total[ix] or 'randomforest' in titles_total[ix] or 'gradientboost' in titles_total[ix]:
continue
if 'girls' in titles_total[ix]:
plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
plt.plot(recall_total[ix], prec_total[ix], linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years - "Vital: BMI-latest"', fontsize=14)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
continue
if 'girls' in titles_total[ix]:
plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
plt.plot(1- np.array(spec_total[ix]), np.array(recall_total[ix]), linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.legend(fontsize = 9)
plt.xlabel('1 - Specificity', fontsize = 14)
plt.ylabel('Sensitivity', fontsize = 14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for ix in range(len(prec_total)):
if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
continue
if 'girls' in titles_total[ix]:
plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
plt.plot(recall_total[ix], prec_total[ix], linestyle='--', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(18,9))
plt.subplot(1, 2, 1)
for ix in range(len(prec_total)):
if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
continue
if 'girls' in titles_total[ix]:
plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
continue
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize = 8, loc=8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
plt.subplot(1, 2, 2)
for ix in range(len(prec_total)):
if any(x in titles_total[ix] for x in ('w/o vitals','Wt','no_maternal','w/o exclusions', 'BMI','gradientboost')):
continue
if 'boys' in titles_total[ix]:
plt.plot(recall_total[ix], prec_total[ix], linestyle='-', label=titles_total[ix]+' - AUC={:0.2f}'.format(auc_list[ix][0]))
else:
continue
plt.legend(fontsize = 8, loc=8)
plt.axis('equal')
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for l in top_ix:
if 'girls' in titles_total[l[2]]:
plt.plot(1- np.array(spec_total[l[2]]), np.array(recall_total[l[2]]), linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
else:
plt.plot(1- np.array(spec_total[l[2]]), np.array(recall_total[l[2]]), linestyle='--', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.ylabel('Sensitivity', fontsize=14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for l in top_ix:
if 'girls' in titles_total[l[2]]:
plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
else:
plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='--', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 10)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
for l in top_ix:
if 'girls' in titles_total[l[2]]:
plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
else:
continue
plt.ylabel('Precision (PPV)', fontsize = 14)
plt.legend(fontsize=10, loc=8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
plt.subplot(1, 2, 2)
for l in top_ix:
if 'boys' in titles_total[l[2]]:
plt.plot(recall_total[l[2]], prec_total[l[2]], linestyle='-', label=titles_total[l[2]]+' - AUC={:0.2f}'.format(auc_list[l[2]][0]))
else:
continue
plt.legend(fontsize=10, loc=8)
plt.axis('equal')
plt.xlabel('Recall (Sensitivity)', fontsize = 14)
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize = 18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
df_l = []
headers=['ix','title','auc','auc ste']
for ix,(auc,title) in enumerate(zip(auc_list,titles_total)):
df_l.append([ix,title,auc[0],auc[1]])
df = pd.DataFrame(df_l, columns=headers)
top_ix = []
for age in ['@ 6','@ 12','@ 18','@ 24','@ 36','@ 48']:
for g in ['boys','girls']:
filtered = df[(df.title.str.contains(age)) &(df.title.str.contains(g)) & (df.title.str.contains('w/o exclusions')) & (~df.title.str.contains('no_maternal'))].sort_values(by='auc', axis=0, ascending=False)
print(filtered.head(3))
name = filtered.iloc[0,1].replace(" maternal w/o exclusions - model: ", " ")
top_ix.append([age,g,filtered.iloc[0,0],name])
ix title auc auc ste
21 21 boys maternal w/o exclusions - model: randomfo... 0.642392 0.018267
5 5 boys maternal w/o exclusions - model: lasso@ 6... 0.633844 0.016899
37 37 boys maternal w/o exclusions - model: gradient... 0.630358 0.015649
ix title auc auc ste
13 13 girls maternal w/o exclusions - model: lasso@ ... 0.692385 0.020549
29 29 girls maternal w/o exclusions - model: randomf... 0.677942 0.015822
45 45 girls maternal w/o exclusions - model: gradien... 0.669962 0.017309
ix title auc auc ste
85 85 boys maternal w/o exclusions - model: gradient... 0.680082 0.014816
69 69 boys maternal w/o exclusions - model: randomfo... 0.672767 0.011746
53 53 boys maternal w/o exclusions - model: lasso@ 1... 0.670870 0.010762
ix title auc auc ste
77 77 girls maternal w/o exclusions - model: randomf... 0.726243 0.013140
93 93 girls maternal w/o exclusions - model: gradien... 0.726203 0.014749
61 61 girls maternal w/o exclusions - model: lasso@ ... 0.720417 0.017287
ix title auc \
133 133 boys maternal w/o exclusions - model: gradient... 0.721950
117 117 boys maternal w/o exclusions - model: randomfo... 0.720265
101 101 boys maternal w/o exclusions - model: lasso@ 1... 0.719445
auc ste
133 0.010960
117 0.012022
101 0.011076
ix title auc \
125 125 girls maternal w/o exclusions - model: randomf... 0.779235
109 109 girls maternal w/o exclusions - model: lasso@ ... 0.768030
141 141 girls maternal w/o exclusions - model: gradien... 0.759643
auc ste
125 0.008353
109 0.012006
141 0.013372
ix title auc \
181 181 boys maternal w/o exclusions - model: gradient... 0.767275
149 149 boys maternal w/o exclusions - model: lasso@ 2... 0.758447
165 165 boys maternal w/o exclusions - model: randomfo... 0.757014
auc ste
181 0.011424
149 0.009043
165 0.011258
ix title auc \
189 189 girls maternal w/o exclusions - model: gradien... 0.806009
157 157 girls maternal w/o exclusions - model: lasso@ ... 0.805645
173 173 girls maternal w/o exclusions - model: randomf... 0.803393
auc ste
189 0.010217
157 0.009200
173 0.005738
ix title auc \
197 197 boys maternal w/o exclusions - model: lasso@ 3... 0.820958
229 229 boys maternal w/o exclusions - model: gradient... 0.819836
213 213 boys maternal w/o exclusions - model: randomfo... 0.818687
auc ste
197 0.011484
229 0.009750
213 0.006361
ix title auc \
205 205 girls maternal w/o exclusions - model: lasso@ ... 0.885750
221 221 girls maternal w/o exclusions - model: randomf... 0.878544
237 237 girls maternal w/o exclusions - model: gradien... 0.872451
auc ste
205 0.010014
221 0.006395
237 0.003827
ix title auc \
277 277 boys maternal w/o exclusions - model: gradient... 0.895406
261 261 boys maternal w/o exclusions - model: randomfo... 0.893322
245 245 boys maternal w/o exclusions - model: lasso@ 4... 0.887492
auc ste
277 0.006081
261 0.006928
245 0.007179
ix title auc \
253 253 girls maternal w/o exclusions - model: lasso@ ... 0.925774
285 285 girls maternal w/o exclusions - model: gradien... 0.921913
269 269 girls maternal w/o exclusions - model: randomf... 0.914098
auc ste
253 0.006551
285 0.003646
269 0.003308
plt.figure(figsize=(9,9))
for i,title in enumerate(titles_24):
if 'original' in titles_24[i].lower():
plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
elif 'inclusion mod' in titles_24[i].lower():
plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
else:
plt.plot(1- np.array(spec_24[i]), np.array(recall_24[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.ylabel('Sensitivity', fontsize=14)
plt.axis('equal')
plt.title('ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_ROC.png', dpi=300)
plt.show()
plt.figure(figsize=(9,9))
for i,title in enumerate(titles_24):
if 'original' in titles_24[i].lower():
plt.plot(recall_24[i], prec_24[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
elif 'inclusion mod' in titles_24[i].lower():
plt.plot(recall_24[i], prec_24[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
else:
plt.plot(recall_24[i], prec_24[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_24[i]))
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.ylabel('Precision (PPV)', fontsize=14)
plt.legend(fontsize = 10)
plt.axis('equal')
plt.title('Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
for i,title in enumerate(titles_best):
if 'girls' in titles_best[i]:
if 'original' in titles_best[i].lower():
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
elif 'mod.' in titles_best[i].lower():
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
continue
plt.legend(fontsize = 10)
plt.ylabel('Sensitivity', fontsize=14)
plt.xlabel('1 - Specificity', fontsize=14)
plt.axis('equal')
plt.title('Girls ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
plt.subplot(1, 2, 2)
for i,title in enumerate(titles_best):
if 'boys' in titles_total[i]:
if 'original' in titles_best[i].lower():
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
elif 'mod.' in titles_best[i].lower():
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
plt.plot(1- np.array(spec_best[i]), np.array(recall_best[i]), linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
continue
plt.legend(fontsize = 10)
plt.xlabel('1 - Specificity', fontsize=14)
plt.axis('equal')
plt.title('Boys ROC Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid(True)
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()
plt.figure(figsize=(20,10))
plt.subplot(1, 2, 1)
for i,title in enumerate(titles_best):
if 'girls' in titles_best[i]:
if 'original' in titles_best[i].lower():
plt.plot(recall_best[i], prec_best[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
elif 'mod.' in titles_best[i].lower():
plt.plot(recall_best[i], prec_best[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
plt.plot(recall_best[i], prec_best[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
continue
plt.ylabel('Precision (PPV)', fontsize=14)
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Girls Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
plt.subplot(1, 2, 2)
for i,title in enumerate(titles_best):
if 'boys' in titles_best[i]:
if 'original' in titles_best[i].lower():
plt.plot(recall_best[i], prec_best[i], linestyle='-', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
elif 'mod.' in titles_best[i].lower():
plt.plot(recall_best[i], prec_best[i], linestyle=':', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
plt.plot(recall_best[i], prec_best[i], linestyle='--', label=title+' - AUC={:0.2f}'.format(auc_best[i]))
else:
continue
plt.xlabel('Recall (Sensitivity)', fontsize=14)
plt.legend(fontsize = 8)
plt.axis('equal')
plt.title('Boys Precision-Recall Curve: Obesity Predicted at 5 years', fontsize=18)
plt.grid()
plt.tight_layout()
# plt.savefig(newdir+'/Pediatric_Girls_PR.png', dpi=300)
plt.show()